{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "d85f147e",
   "metadata": {},
   "source": [
    "# 数据处理\n",
    "先转换为单题的单个字符一列的excel；只提取出有效值；例如frame\\\":{\\\"successRate\\\":,\\\"minJumps\\\":,\\\"jumps\\，这些说明字段删去；\n",
    "# 数据分析\n",
    "1、每个题的平均作答时长；——结果数据</br>\n",
    "\n",
    "2、每个题的编码种类（有多少种，分别是什么，每种多少学生）；——结果数据</br>\n",
    "\n",
    "3、每个题的每个操作步骤的平均作答时长；——过程数据</br>\n",
    "\n",
    "4、正确率（暂未提供标准编码，可以探索一下题目本身，协助形成标准答案编码）；——结果数据</br>\n",
    "\n",
    "5、关键节点（通过数据，探索学生在从初始状态向终止状态进行的过程中，有几个关键步骤，每个关键步骤有几种类型的关键节点编码），体现“用数据说话”去探索关键节点。——过程数据</br>\n",
    "【以上仅供参考，希望包含，但不限于此】\n",
    "\n",
    "答案解析：一个frame字段是一个题。\n",
    "每个题目的记录方式不同，请用demo账号登录题目，页面最上方点击打开答案，可以看到该题目的每个步骤点击后出现的答案数据，也可以直接与命题人沟通询问具体每个答案的内涵。\n",
    "\n",
    "\n",
    "[\"{\\\"frame\\\":{\\\"successRate\\\":1,\\\"minJumps\\\":2,\\\"jumps\\\":2,\\\"path\\\":[1,3,2]}}\",\"{\\\"frame\\\":{\\\"successRate\\\":1,\\\"minJumps\\\":2,\\\"jumps\\\":2,\\\"path\\\":[3,1,0]}}\",\"{\\\"frame\\\":[\\\"00\\\",\\\"01\\\",\\\"02\\\",\\\"06\\\"]}\",\"{\\\"frame\\\":[\\\"00\\\",\\\"01\\\",\\\"02\\\",\\\"03\\\"]}\",\"{\\\"frame\\\":[[0,0,0,0,0,0,0],[1,0,1,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,1,0,0,0,1]]}\",\"{\\\"frame\\\":[\\\"C_B\\\"]}\",\"{\\\"frame\\\":[\\\"C_G\\\",\\\"F_G\\\"]}\",\"{\\\"frame\\\":[\\\"09_02\\\"]}\",\"{\\\"frame\\\":[\\\"v_10_v_2\\\"]}\",\"{\\\"frame\\\":[]}\",\"{\\\"frame\\\":[]}\",\"{\\\"frame\\\":[[[2,1],[3,1],[4,1],[4,2]]]}\",\"{\\\"frame\\\":[[{\\\"row\\\":0,\\\"col\\\":0}]]}\",\"{\\\"frame\\\":[[{\\\"row\\\":1,\\\"col\\\":0}]]}\",\"{\\\"frame\\\":{\\\"stamps\\\":[2,1,0],\\\"start\\\":1,\\\"end\\\":16,\\\"selected\\\":true}}\",\"{\\\"frame\\\":{\\\"stamps\\\":[2,1,0],\\\"start\\\":1,\\\"end\\\":5,\\\"selected\\\":true}}\",\"{\\\"frame\\\":{\\\"stamps\\\":[2,1,0,1],\\\"start\\\":5,\\\"end\\\":5,\\\"selected\\\":true}}\",\"{\\\"frame\\\":{\\\"rotation\\\":[[0,1,0]],\\\"lowered\\\":null}}\",\"{\\\"frame\\\":{\\\"rotation\\\":[[0,1,0,1,3,2]],\\\"lowered\\\":null}}\",\"{\\\"frame\\\":{\\\"rotation\\\":[[null,null,2,0,2,3],[0,2,2,null,3,null],[null,0,null,null,1,null],[0,2,0,1,null,null]],\\\"lowered\\\":null}}\",\"{\\\"frame\\\":{\\\"sequence\\\":[0,0,0,0,0,1,1],\\\"transformation\\\":[[0],[0,1]]}}\",\"{\\\"frame\\\":{\\\"sequence\\\":[0,0,0],\\\"transformation\\\":[[0,1],[1,0]]}}\",\"{\\\"frame\\\":[4,5]}\",\"{\\\"frame\\\":[0,1,2,2,0,0]}\"]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e73e9beb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "        <script type=\"text/javascript\">\n",
       "        window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
       "        if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
       "        if (typeof require !== 'undefined') {\n",
       "        require.undef(\"plotly\");\n",
       "        requirejs.config({\n",
       "            paths: {\n",
       "                'plotly': ['https://cdn.plot.ly/plotly-2.2.0.min']\n",
       "            }\n",
       "        });\n",
       "        require(['plotly'], function(Plotly) {\n",
       "            window._Plotly = Plotly;\n",
       "        });\n",
       "        }\n",
       "        </script>\n",
       "        "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import json \n",
    "import numpy as np\n",
    "import ast\n",
    "from datetime import datetime\n",
    "import plotly as py\n",
    "import plotly.graph_objs as go\n",
    "from plotly.offline import plot\n",
    "from IPython.core.display import HTML\n",
    "import plotly.offline as offline\n",
    "offline.init_notebook_mode(connected=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "b62b3f15",
   "metadata": {},
   "outputs": [],
   "source": [
    "class data_analysis:\n",
    "    def __init__(self, df) -> None:\n",
    "        self.problem_num = 23\n",
    "        self.df = df\n",
    "        self.row_num = len(df)\n",
    "        self.df.insert(len(self.df.columns), 'ans', self.remove_str())\n",
    "        self.df.insert(len(self.df.columns), 'interval', self.get_interval())\n",
    "        self.ndf = pd.DataFrame(self.create_new_df())\n",
    "        self.ndf_list = self.divide_ndf()\n",
    "        self.group_list = self.group_by()\n",
    "        print('init complete')\n",
    "\n",
    "    def remove_str_per_row(self, data_per_row):\n",
    "        frame_list = ast.literal_eval(data_per_row)\n",
    "        frame_dic_list = []\n",
    "        for index in range(len(frame_list)):\n",
    "            frame_dic_list.append(json.loads(frame_list[index])) \n",
    "        return frame_dic_list\n",
    "\n",
    "    def remove_str(self):\n",
    "        ndf_ans_8_list = []\n",
    "        ndf_rm_frame = []\n",
    "        for i in range(self.row_num):\n",
    "            dic_temp = self.remove_str_per_row(self.df.loc[i,'task_answers'])\n",
    "            ndf_ans_8_list.append(dic_temp)\n",
    "            new_dic_list = []\n",
    "            for dic in dic_temp:\n",
    "                dic = dic['frame']\n",
    "                new_dic = dic\n",
    "                new_dic_list.append(new_dic)\n",
    "            ndf_rm_frame.append(new_dic_list)\n",
    "\n",
    "        return ndf_rm_frame\n",
    "    \n",
    "    def get_interval(self):\n",
    "        interval_list = []\n",
    "        for i in range(len(self.df)):\n",
    "            interval_list.append(self.get_interval_per_row(i))\n",
    "        return interval_list\n",
    "\n",
    "    def get_interval_per_row(self, index):\n",
    "        row_data = self.df.loc[index,:]\n",
    "        start_time = row_data['start_time']\n",
    "        start_time = datetime.strptime(start_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
    "\n",
    "        expire_time = row_data['expire_time']\n",
    "        expire_time = datetime.strptime(expire_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
    "\n",
    "        stop_time = row_data['stop_time']\n",
    "        if stop_time != stop_time:\n",
    "            return -1\n",
    "        stop_time = datetime.strptime(stop_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
    "\n",
    "        total_sec = (stop_time - start_time).seconds\n",
    "        return total_sec\n",
    "    \n",
    "    def create_new_df(self):\n",
    "        twoD_list = []\n",
    "        for row in range(self.row_num):\n",
    "            ans_dic_list = self.df.loc[row, 'ans']\n",
    "            twoD_list.append(ans_dic_list)\n",
    "        return twoD_list\n",
    "    \n",
    "    def divide_ndf(self):\n",
    "        ndf_list = []\n",
    "        for i in range(len(self.ndf.columns)):\n",
    "            ndf_list.append(pd.DataFrame(self.ndf.loc[:,i]))\n",
    "        return ndf_list\n",
    "    \n",
    "    def group_by_per_problem(self, index):\n",
    "        df_temp = self.ndf_list[index]\n",
    "        df_str_list = []\n",
    "        for j in range(len(df_temp)):\n",
    "            ndf_index_j = df_temp.iloc[j, 0]\n",
    "            if ndf_index_j == None:\n",
    "                df_str_list.append(str(None))\n",
    "            else:\n",
    "                df_str_list.append(self.content_to_str(ndf_index_j))\n",
    "        df_temp.insert(1, 'ans_str', df_str_list)\n",
    "        df_per_problom = df_temp.groupby('ans_str')\n",
    "        return df_per_problom\n",
    "\n",
    "    def content_to_str(self, data):\n",
    "        str_data = ''\n",
    "        if data == None:\n",
    "            return str(None)\n",
    "        elif type(data) == type([]):\n",
    "            return self.data_to_str(data)\n",
    "        elif 'data' in data.keys():\n",
    "            return self.data_to_str(data['data'])\n",
    "        else:\n",
    "            return self.data_to_str(data)\n",
    "\n",
    "    def data_to_str(self, data):\n",
    "        if type(data) == type({}):\n",
    "            return str(list(data.values()))\n",
    "        else:\n",
    "            return str(data)\n",
    "\n",
    "    def group_by(self):\n",
    "        group_list = []\n",
    "        for i in range(self.problem_num):\n",
    "            df_temp = self.group_by_per_problem(i)\n",
    "            group_list.append(df_temp)\n",
    "        return group_list\n",
    "\n",
    "    def plot(self):\n",
    "        data = [go.Histogram(x=list(self.df.loc[:,'interval']))] \n",
    "        layout={\"title\": \"学生用时分布\", \n",
    "                                       \"xaxis_title\": \"学生用时，单位秒\",\n",
    "                                       \"yaxis_title\": \"学生个数\",\n",
    "                                       # x轴坐标倾斜60度\n",
    "                                       \"xaxis\": {\"tickangle\": 60}\n",
    "                                      }\n",
    "        fig = go.Figure(data=data,layout=layout)\n",
    "        plot(fig,filename=\"./plot/vector.html\",auto_open=False,image='png',image_height=800,image_width=1500)\n",
    "        offline.iplot(fig) \n",
    "        return 0\n",
    "\n",
    "    def plot_problem(self):\n",
    "        data = [go.Bar(x = list(range(self.problem_num)), y = [len(list(group)) for group in self.group_list])] \n",
    "        layout={\"title\": \"不同题目的编码数量\", \n",
    "                                       \"xaxis_title\": \"题目编号\",\n",
    "                                       \"yaxis_title\": \"编码个数\",\n",
    "                                       # x轴坐标倾斜60度\n",
    "                                       \"xaxis\": {\"tickangle\": 60}\n",
    "                                      }\n",
    "        fig = go.Figure(data=data,layout=layout)\n",
    "        plot(fig,filename=\"./plot/vector.html\",auto_open=False,image='png',image_height=800,image_width=1500)\n",
    "        offline.iplot(fig) \n",
    "        return 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3b99c9a0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "init complete\n"
     ]
    }
   ],
   "source": [
    "df = pd.read_excel('./data/data.xlsx')\n",
    "data_entity = data_analysis(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d2b2fd91",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.sort_values('school', ascending=True, inplace= False )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9cc5ada7",
   "metadata": {},
   "outputs": [],
   "source": [
    "grouped = df.groupby('school')\n",
    "df_l = []\n",
    "for value, group in grouped:\n",
    "    df_l.append(group)\n",
    "df_l[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abeddddf",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc[0:266,:].to_excel('./data/junior.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b410779",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.loc[267:, :].to_excel('./data/senior.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79639e93",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_junior = pd.read_excel('./data/junior.xlsx')\n",
    "df_junior"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0fb1a77f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_senior = pd.read_excel('./data/senior.xlsx')\n",
    "df_senior"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1931a690",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_entity.df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "02a7af39",
   "metadata": {},
   "source": [
    "# 数据处理\n",
    "处理函数定义在类data_analysis中的子函数remove_str()中\n",
    "移除每一行中第8列中的frame\\\":{\\\"successRate\\\":,\\\"minJumps\\\":,\\\"jumps\\，这些说明字段删去；"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3811d24c",
   "metadata": {},
   "outputs": [],
   "source": [
    "index = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5ccf75ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_per_row = df.iloc[index,8]\n",
    "frame_list = ast.literal_eval(data_per_row)\n",
    "frame_dic_list = []\n",
    "for index in range(len(frame_list)):\n",
    "    frame_dic_list.append(json.loads(frame_list[index])) \n",
    "frame_dic_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5c756d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_df = pd.DataFrame([[{1:'ds'},{2,'df'}], [{1:'ds'},{2,'df'}]])\n",
    "test_df.iloc[0,0][1]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1c91c870",
   "metadata": {},
   "source": [
    "最后将经过处理的结构体的列表存储到新的列中，新属性的名称为‘ans’，注意这一步操作已经在初始化函数中完成"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "401c1aa8",
   "metadata": {},
   "outputs": [],
   "source": [
    "dic_temp = data_entity.df.loc[0,'ans'][20]['frame']\n",
    "dic_temp "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d2cc2d06",
   "metadata": {},
   "source": [
    "# 添加列（时长），单位s\n",
    "新增属性'interval'\n",
    "\n",
    "每一行的实例数据为："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f88915b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "row_data = data_entity.df.loc[0,:]\n",
    "row_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c95bb1c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "start_time = row_data['start_time']\n",
    "start_time = datetime.strptime(start_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
    "\n",
    "expire_time = row_data['expire_time']\n",
    "expire_time = datetime.strptime(expire_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
    "\n",
    "stop_time = row_data['stop_time']\n",
    "stop_time = datetime.strptime(stop_time,\"%Y-%m-%dT%H:%M:%S+08:00\")\n",
    "\n",
    "total_sec = (stop_time - start_time).seconds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3717094e",
   "metadata": {},
   "outputs": [],
   "source": [
    "total_sec"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "037fdfff",
   "metadata": {},
   "source": [
    "基于以上思路的代码实现在init函数中执行（self.get_interval）"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "50977b2b",
   "metadata": {},
   "source": [
    "## 所有答卷的完成时间的分布图如下"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "897431f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_entity.plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dbf98c6d",
   "metadata": {},
   "source": [
    "# 新建关键字是题目的表\n",
    "（总共23个题目）"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b189e621",
   "metadata": {},
   "source": [
    "每一行中的‘ans’属性为一个固定长度（23）的列表，该列表中的元素格式为词典，现在建立一个新的dataframe，共662行，23列，每列对应同一个题目。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2d899862",
   "metadata": {},
   "outputs": [],
   "source": [
    "ndf = data_entity.ndf"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0a88ae59",
   "metadata": {},
   "source": [
    "将ndf切分成23份并提取关键信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "57807177",
   "metadata": {},
   "outputs": [],
   "source": [
    "ndf0 = ndf.loc[:,0]\n",
    "ndf0_df = pd.DataFrame(ndf0)\n",
    "ndf0_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a73a186",
   "metadata": {},
   "outputs": [],
   "source": [
    "ndf0 = data_entity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3202fece",
   "metadata": {},
   "outputs": [],
   "source": [
    "a = list(data_entity.ndf_list[22].iloc[:,0])\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3d1a3ff9",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in data_entity.ndf_list:\n",
    "    df_temp = i\n",
    "    df_str_list = []\n",
    "    for j in range(len(df_temp)):\n",
    "        ndf_index_j = df_temp.iloc[j, 0]\n",
    "        if ndf_index_j == None:\n",
    "            df_str_list.append(str(None))\n",
    "        else:\n",
    "            df_str_list.append(content_to_str(ndf_index_j))\n",
    "# df_str_list\n",
    "# df_temp.insert(1, 'ans_str', df_str_list)\n",
    "# len(df_str_list)\n",
    "#     df_temp.insert(1, 'ans_str', df_str_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f4dc894",
   "metadata": {},
   "outputs": [],
   "source": [
    "def content_to_str(data):\n",
    "    str_data = ''\n",
    "    if data == None:\n",
    "        return str(None)\n",
    "    elif type(data) == type([]):\n",
    "        return data_to_str(data)\n",
    "    elif 'data' in data.keys():\n",
    "        return data_to_str(data['data'])\n",
    "    else:\n",
    "        return data_to_str(data)\n",
    "\n",
    "def data_to_str(data):\n",
    "    if type(data) == type({}):\n",
    "        return str(list(data.values()))\n",
    "    else:\n",
    "        return str(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4a1c59f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "test = {'sequence': [1, 1, 0, 0, 1, 0, 1],\n",
    "   'transformation': [[0], [1, 0]]}\n",
    "test1 = [[0, 0, 0, 0, 0, 0, 0],\n",
    "   [1, 0, 1, 0, 0, 0, 0],\n",
    "   [0, 0, 0, 0, 0, 0, 0],\n",
    "   [0, 0, 0, 0, 0, 0, 0],\n",
    "   [1, 0, 0, 0, 0, 0, 0]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a9d3cee",
   "metadata": {},
   "outputs": [],
   "source": [
    "type(test) == type({})\n",
    "str(list(test.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8f656032",
   "metadata": {},
   "outputs": [],
   "source": [
    "test1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af9fc0ef",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "31c73f85",
   "metadata": {},
   "source": [
    "题目的字典有三种格式：\n",
    "1、ndf0.iloc[66,0] == None\n",
    "\n",
    "2、ndf0.iloc[0,0]\n",
    "'data':{'successRate': 1, 'minJumps': 2, 'jumps': 2, 'path': [2, 0, 1]}\n",
    "\n",
    "3、ndf0.iloc[7,0]\n",
    "{'successRate': 0, 'minJumps': 2, 'jumps': 3, 'path': [1, 2, 3, 4]}\n",
    "\n",
    "题目之间的data格式不一样：\n",
    "0，1为：{'successRate': 1, 'minJumps': 2, 'jumps': 2, 'path': [2, 0, 1]}\n",
    "\n",
    "2，3为：['00', '01', '02']\n",
    "\n",
    "4为：[[0, 0, 0, 0, 0, 0, 0],\n",
    "   [1, 0, 1, 0, 0, 0, 0],\n",
    "   [0, 0, 0, 0, 0, 0, 0],\n",
    "   [0, 0, 0, 0, 0, 0, 0],\n",
    "   [1, 0, 0, 0, 0, 0, 0]]\n",
    "   \n",
    "5为：['B_A', 'C_A']\n",
    "\n",
    "6为：['B_A', 'C_A', 'G_F', 'D_B', 'E_B']\n",
    "\n",
    "7为：[[[0, 1], [0, 2], [1, 2]],\n",
    "   [[0, 4], [0, 5], [1, 5]],\n",
    "   [[0, 6], [0, 7], [1, 7]],\n",
    "   [[0, 10], [0, 11], [1, 11]]]\n",
    "   \n",
    "8为：[[[1, 0], [2, 0], [3, 0], [3, 1]],\n",
    "   [[2, 3], [3, 3], [4, 3], [4, 4]],\n",
    "   [[0, 4], [1, 4], [2, 4], [2, 5]],\n",
    "   [[0, 1], [1, 1], [2, 1], [2, 2]],\n",
    "   [[3, 2], [4, 2], [5, 2], [5, 3]]]\n",
    "   \n",
    "9为['09_02', '05_06']\n",
    "\n",
    "10为['v_14_v_6', 'v_2_v_16', 'v_15_v_12']\n",
    "\n",
    "11，12为[[{'row': 0, 'col': 0},\n",
    "    {'row': 1, 'col': 2},\n",
    "    {'row': 2, 'col': 1},\n",
    "    {'row': 0, 'col': 2},\n",
    "    {'row': 2, 'col': 4}]]\n",
    "    \n",
    "13为 [2, 1]\n",
    "\n",
    "14为[1, 2, 1, 2, 2, 3]\n",
    "\n",
    "15为{'stamps': [1, 0, 1, 2], 'start': 1, 'end': 16, 'selected': True}}\n",
    "\n",
    "16为{'stamps': [2, 1, 0, 1, 0],\n",
    "   'start': 1,\n",
    "   'end': 5,\n",
    "   'selected': False}\n",
    "   \n",
    "17为{'stamps': [2, 1, 0, 1], 'start': 1, 'end': 5, 'selected': False}\n",
    "\n",
    "18为{'rotation': [[1, 0, 1]], 'lowered': [0, 0]}\n",
    "\n",
    " {'rotation': [[2, 2, 0]], 'lowered': None}\n",
    " \n",
    "19为{'rotation': [[0, 0, 1, 3, 0, 1]], 'lowered': None}\n",
    "\n",
    " {'rotation': [[2, 3, 1, 0, 0, 1]], 'lowered': [0, 2]}\n",
    " \n",
    "20为{'rotation': [[None, None, 1, 1, 1, 0],\n",
    "    [1, 1, 3, None, 0, None],\n",
    "    [None, 1, None, None, 0, None],\n",
    "    [1, 1, 1, 0, None, None]],\n",
    "   'lowered': None}\n",
    "   \n",
    "21为{'sequence': [1, 0, 1, 0, 0, 1, 1],\n",
    "\n",
    "   'transformation': [[0], [0, 1]]}\n",
    "   \n",
    "22为{'sequence': [1, 1, 0, 0, 1, 0, 1],\n",
    "\n",
    "   'transformation': [[0], [1, 0]]}\n",
    "   "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "487bdd9c",
   "metadata": {},
   "source": [
    "## 每道题编码种类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "132446f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ndf0_group = ndf0_df.groupby(0)\n",
    "# list(ndf0_group)\n",
    "gl = data_entity.group_list\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24d6b280",
   "metadata": {},
   "outputs": [],
   "source": [
    "list(gl[4])[1][1]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d84f4c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "g1.loc[0,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8526399f",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(list(gl[6]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9add9fee",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_entity.plot_problem()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "be338894",
   "metadata": {},
   "source": [
    "基于上述求解思路，编写self.group_by()函数以得出各个题目的编码数量"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7ba1a20b",
   "metadata": {},
   "source": [
    "## 每道题正确率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b9fd1d2b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
